From 67fcac7a3e41c7e81efc6b459deeb1282e7a4c58 Mon Sep 17 00:00:00 2001 From: "akw27@labyrinth.cl.cam.ac.uk" Date: Wed, 12 Jan 2005 09:23:50 +0000 Subject: [PATCH] bitkeeper revision 1.1159.219.1 (41e4eca6eauLGi5osqpDb_OmwLgatA) Some fixes and cleanups to the blktap code. --- .../drivers/xen/blktap/blktap.c | 4 +- .../drivers/xen/blktap/blktap.h | 133 ++-- .../drivers/xen/blktap/blktap_controlmsg.c | 202 +++++- .../drivers/xen/blktap/blktap_datapath.c | 673 ++++++++---------- .../drivers/xen/blktap/blktap_userdev.c | 225 +++++- 5 files changed, 744 insertions(+), 493 deletions(-) diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c index 5e7d47c58f..e4fbf390bc 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.c @@ -41,9 +41,9 @@ int __init xlblk_init(void) DPRINTK(" tap - Frontend connection init:\n"); active_reqs_init(); + blkif_interface_init(); + blkdev_schedule_init(); - ptfe_blkif.status = DISCONNECTED; - (void)ctrl_if_register_receiver(CMSG_BLKIF_BE, blkif_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h index 7e5d73ddf7..2d67d592fc 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap.h @@ -23,23 +23,32 @@ #include #include #include +#include + +/* Used to signal to the backend that this is a tap domain. */ +#define BLKTAP_COOKIE 0xbeadfeed /* -------[ debug / pretty printing ]--------------------------------- */ #if 0 +#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) +#else +#define DPRINTK(_f, _a...) ((void)0) +#endif + +#if 1 #define ASSERT(_p) \ if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \ __LINE__, __FILE__); *(int*)0=0; } -#define DPRINTK(_f, _a...) printk(KERN_ALERT "(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) #else #define ASSERT(_p) ((void)0) -#define DPRINTK(_f, _a...) ((void)0) #endif #define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) -/* -------[ connection / request tracking ]--------------------------- */ + +/* -------[ connection tracking ]------------------------------------- */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) #define VMALLOC_VMADDR(x) ((unsigned long)(x)) @@ -49,30 +58,40 @@ extern spinlock_t blkif_io_lock; typedef struct blkif_st { /* Unique identifier for this interface. */ - domid_t domid; - unsigned int handle; + domid_t domid; + unsigned int handle; /* Physical parameters of the comms window. */ - unsigned long shmem_frame; - unsigned int evtchn; - int irq; + unsigned long shmem_frame; + unsigned int evtchn; + int irq; /* Comms information. */ - blkif_ring_t *blk_ring_base; /* ioremap()'ed ptr to shmem_frame. */ - BLKIF_RING_IDX blk_req_cons; /* Request consumer. */ - BLKIF_RING_IDX blk_resp_prod; /* Private version of resp. producer. */ + blkif_back_ring_t blk_ring; enum { DISCONNECTED, DISCONNECTING, CONNECTED } status; /* * DISCONNECT response is deferred until pending requests are ack'ed. * We therefore need to store the id from the original request. - */ u8 disconnect_rspid; - struct blkif_st *hash_next; - struct list_head blkdev_list; - spinlock_t blk_ring_lock; - atomic_t refcnt; - + */ + u8 disconnect_rspid; + struct blkif_st *hash_next; + struct list_head blkdev_list; + spinlock_t blk_ring_lock; + atomic_t refcnt; struct work_struct work; } blkif_t; +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle); +void blkif_disconnect_complete(blkif_t *blkif); +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if ( atomic_dec_and_test(&(_b)->refcnt) ) \ + blkif_disconnect_complete(_b); \ + } while (0) + + +/* -------[ active request tracking ]--------------------------------- */ + typedef struct { blkif_t *blkif; unsigned long id; @@ -80,48 +99,16 @@ typedef struct { unsigned long mach_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned long virt_fas[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int next_free; + int inuse; /* debugging */ } active_req_t; +typedef unsigned int ACTIVE_RING_IDX; -/* -------[ block ring structs ]-------------------------------------- */ - -/* Types of ring. */ -#define BLKIF_REQ_RING_TYPE 1 -#define BLKIF_RSP_RING_TYPE 2 - -/* generic ring struct. */ -typedef struct blkif_generic_ring_struct { - int type; -} blkif_generic_ring_t; - -/* A requestor's view of a ring. */ -typedef struct blkif_req_ring_struct { - - int type; /* Will be BLKIF_REQ_RING_TYPE */ - BLKIF_RING_IDX req_prod; /* PRIVATE req_prod index */ - BLKIF_RING_IDX rsp_cons; /* Response consumer index */ - blkif_ring_t *ring; /* Pointer to shared ring struct */ - -} blkif_req_ring_t; - -#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 } - -/* A responder's view of a ring. */ -typedef struct blkif_rsp_ring_struct { - - int type; - BLKIF_RING_IDX rsp_prod; /* PRIVATE rsp_prod index */ - BLKIF_RING_IDX req_cons; /* Request consumer index */ - blkif_ring_t *ring; /* Pointer to shared ring struct */ - -} blkif_rsp_ring_t; - -#define BLKIF_RSP_RING_INIT = { BLKIF_RSP_RING_TYPE, 0, 0, 0 } - -#define RING(a) (blkif_generic_ring_t *)(a) - -inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring); +active_req_t *lookup_active_req(ACTIVE_RING_IDX idx); +inline unsigned int ID_TO_IDX(unsigned long id); +inline domid_t ID_TO_DOM(unsigned long id); +inline void active_reqs_init(void); /* -------[ interposition -> character device interface ]------------- */ @@ -135,6 +122,7 @@ inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring); #define BLKTAP_IOCTL_KICK_FE 1 #define BLKTAP_IOCTL_KICK_BE 2 #define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_PRINT_IDXS 100 /* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ #define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ @@ -196,22 +184,12 @@ extern unsigned long mmap_vstart; #define RING_PAGES 128 extern unsigned long rings_vstart; -/* -------[ Here be globals ]----------------------------------------- */ +/* -------[ Here be globals ]----------------------------------------- */ extern unsigned long blktap_mode; - -/* blkif struct, containing ring to FE domain */ -extern blkif_t ptfe_blkif; - /* Connection to a single backend domain. */ -extern blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */ -extern BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */ -extern BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */ - -/* Rings up to user space. */ -extern blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT; -extern blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT; +extern blkif_front_ring_t blktap_be_ring; /* Event channel to backend domain. */ extern unsigned int blkif_ptbe_evtchn; @@ -224,10 +202,13 @@ extern unsigned long blktap_ring_ok; /* init function for character device interface. */ int blktap_init(void); +/* init function for the blkif cache. */ +void __init blkif_interface_init(void); +void __init blkdev_schedule_init(void); +void blkif_deschedule(blkif_t *blkif); + /* interfaces to the char driver, passing messages to and from apps. */ void blktap_kick_user(void); -int blktap_write_to_ring(blkif_request_t *req); - /* user ring access functions: */ int blktap_write_fe_ring(blkif_request_t *req); @@ -235,11 +216,12 @@ int blktap_write_be_ring(blkif_response_t *rsp); int blktap_read_fe_ring(void); int blktap_read_be_ring(void); -/* and the helpers they call: */ -inline int write_resp_to_fe_ring(blkif_response_t *rsp); -inline void kick_fe_domain(void); +/* fe/be ring access functions: */ +int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp); +int write_req_to_be_ring(blkif_request_t *req); -inline int write_req_to_be_ring(blkif_request_t *req); +/* event notification functions */ +inline void kick_fe_domain(blkif_t *blkif); inline void kick_be_domain(void); /* Interrupt handlers. */ @@ -250,5 +232,8 @@ irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs); /* Control message receiver. */ extern void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id); +/* debug */ +void print_vm_ring_idxs(void); + #define __BLKINT_H__ #endif diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c index a3d485a6f3..b3cd111897 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_controlmsg.c @@ -32,10 +32,71 @@ unsigned int blkif_ptbe_evtchn; /*-----[ Control Messages to/from Frontend VMs ]--------------------------*/ +#define BLKIF_HASHSZ 1024 +#define BLKIF_HASH(_d,_h) (((int)(_d)^(int)(_h))&(BLKIF_HASHSZ-1)) + +static kmem_cache_t *blkif_cachep; +static blkif_t *blkif_hash[BLKIF_HASHSZ]; + +blkif_t *blkif_find_by_handle(domid_t domid, unsigned int handle) +{ + blkif_t *blkif = blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif != NULL) && + ((blkif->domid != domid) || (blkif->handle != handle)) ) + blkif = blkif->hash_next; + return blkif; +} + +static void __blkif_disconnect_complete(void *arg) +{ + blkif_t *blkif = (blkif_t *)arg; + ctrl_msg_t cmsg; + blkif_be_disconnect_t disc; + + /* + * These can't be done in blkif_disconnect() because at that point there + * may be outstanding requests at the disc whose asynchronous responses + * must still be notified to the remote driver. + */ + unbind_evtchn_from_irq(blkif->evtchn); + vfree(blkif->blk_ring.sring); + + /* Construct the deferred response message. */ + cmsg.type = CMSG_BLKIF_BE; + cmsg.subtype = CMSG_BLKIF_BE_DISCONNECT; + cmsg.id = blkif->disconnect_rspid; + cmsg.length = sizeof(blkif_be_disconnect_t); + disc.domid = blkif->domid; + disc.blkif_handle = blkif->handle; + disc.status = BLKIF_BE_STATUS_OKAY; + memcpy(cmsg.msg, &disc, sizeof(disc)); + + /* + * Make sure message is constructed /before/ status change, because + * after the status change the 'blkif' structure could be deallocated at + * any time. Also make sure we send the response /after/ status change, + * as otherwise a subsequent CONNECT request could spuriously fail if + * another CPU doesn't see the status change yet. + */ + mb(); + if ( blkif->status != DISCONNECTING ) + BUG(); + blkif->status = DISCONNECTED; + mb(); + + /* Send the successful response. */ + ctrl_if_send_response(&cmsg); +} + +void blkif_disconnect_complete(blkif_t *blkif) +{ + INIT_WORK(&blkif->work, __blkif_disconnect_complete, (void *)blkif); + schedule_work(&blkif->work); +} void blkif_ptfe_create(blkif_be_create_t *create) { - blkif_t *blkif; + blkif_t *blkif, **pblkif; domid_t domid = create->domid; unsigned int handle = create->blkif_handle; @@ -43,16 +104,38 @@ void blkif_ptfe_create(blkif_be_create_t *create) /* May want to store info on the connecting domain here. */ DPRINTK("PT got BE_CREATE\n"); - blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */ + + if ( (blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL)) == NULL ) + { + DPRINTK("Could not create blkif: out of memory\n"); + create->status = BLKIF_BE_STATUS_OUT_OF_MEMORY; + return; + } /* blkif struct init code from blkback.c */ memset(blkif, 0, sizeof(*blkif)); blkif->domid = domid; blkif->handle = handle; - blkif->status = DISCONNECTED; + blkif->status = DISCONNECTED; spin_lock_init(&blkif->blk_ring_lock); atomic_set(&blkif->refcnt, 0); + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( *pblkif != NULL ) + { + if ( ((*pblkif)->domid == domid) && ((*pblkif)->handle == handle) ) + { + DPRINTK("Could not create blkif: already exists\n"); + create->status = BLKIF_BE_STATUS_INTERFACE_EXISTS; + kmem_cache_free(blkif_cachep, blkif); + return; + } + pblkif = &(*pblkif)->hash_next; + } + + blkif->hash_next = *pblkif; + *pblkif = blkif; + create->status = BLKIF_BE_STATUS_OKAY; } @@ -61,24 +144,59 @@ void blkif_ptfe_destroy(blkif_be_destroy_t *destroy) { /* Clear anything that we initialized above. */ + domid_t domid = destroy->domid; + unsigned int handle = destroy->blkif_handle; + blkif_t **pblkif, *blkif; + DPRINTK("PT got BE_DESTROY\n"); + + pblkif = &blkif_hash[BLKIF_HASH(domid, handle)]; + while ( (blkif = *pblkif) != NULL ) + { + if ( (blkif->domid == domid) && (blkif->handle == handle) ) + { + if ( blkif->status != DISCONNECTED ) + goto still_connected; + goto destroy; + } + pblkif = &blkif->hash_next; + } + + destroy->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + + still_connected: + destroy->status = BLKIF_BE_STATUS_INTERFACE_CONNECTED; + return; + + destroy: + *pblkif = blkif->hash_next; + kmem_cache_free(blkif_cachep, blkif); destroy->status = BLKIF_BE_STATUS_OKAY; } void blkif_ptfe_connect(blkif_be_connect_t *connect) { - domid_t domid = connect->domid; - /*unsigned int handle = connect->blkif_handle;*/ - unsigned int evtchn = connect->evtchn; - unsigned long shmem_frame = connect->shmem_frame; + domid_t domid = connect->domid; + unsigned int handle = connect->blkif_handle; + unsigned int evtchn = connect->evtchn; + unsigned long shmem_frame = connect->shmem_frame; struct vm_struct *vma; - pgprot_t prot; - int error; - blkif_t *blkif; + pgprot_t prot; + int error; + blkif_t *blkif; + blkif_sring_t *sring; DPRINTK("PT got BE_CONNECT\n"); - blkif = &ptfe_blkif; /* for convenience if the hash is readded later. */ + blkif = blkif_find_by_handle(domid, handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("blkif_connect attempted for non-existent blkif (%u,%u)\n", + connect->domid, connect->blkif_handle); + connect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return; + } if ( (vma = get_vm_area(PAGE_SIZE, VM_IOREMAP)) == NULL ) { @@ -112,30 +230,51 @@ void blkif_ptfe_connect(blkif_be_connect_t *connect) return; } + sring = (blkif_sring_t *)vma->addr; + SHARED_RING_INIT(BLKIF_RING, sring); + BACK_RING_INIT(BLKIF_RING, &blkif->blk_ring, sring); + blkif->evtchn = evtchn; blkif->irq = bind_evtchn_to_irq(evtchn); blkif->shmem_frame = shmem_frame; - blkif->blk_ring_base = (blkif_ring_t *)vma->addr; blkif->status = CONNECTED; - /*blkif_get(blkif);*/ + blkif_get(blkif); request_irq(blkif->irq, blkif_ptfe_int, 0, "blkif-pt-backend", blkif); connect->status = BLKIF_BE_STATUS_OKAY; } -void blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect) +int blkif_ptfe_disconnect(blkif_be_disconnect_t *disconnect, u8 rsp_id) { - /* - * don't actually set the passthrough to disconnected. - * We just act as a pipe, and defer to the real ends to handle things like - * recovery. - */ + domid_t domid = disconnect->domid; + unsigned int handle = disconnect->blkif_handle; + blkif_t *blkif; DPRINTK("PT got BE_DISCONNECT\n"); + + blkif = blkif_find_by_handle(domid, handle); + if ( unlikely(blkif == NULL) ) + { + DPRINTK("blkif_disconnect attempted for non-existent blkif" + " (%u,%u)\n", disconnect->domid, disconnect->blkif_handle); + disconnect->status = BLKIF_BE_STATUS_INTERFACE_NOT_FOUND; + return 1; /* Caller will send response error message. */ + } + + if ( blkif->status == CONNECTED ) + { + blkif->status = DISCONNECTING; + blkif->disconnect_rspid = rsp_id; + wmb(); /* Let other CPUs see the status change. */ + free_irq(blkif->irq, blkif); + blkif_deschedule(blkif); + blkif_put(blkif); + return 0; /* Caller should not send response message. */ + } disconnect->status = BLKIF_BE_STATUS_OKAY; - return; + return 1; } /*-----[ Control Messages to/from Backend VM ]----------------------------*/ @@ -150,7 +289,7 @@ static void blkif_ptbe_send_interface_connect(void) }; blkif_fe_interface_connect_t *msg = (void*)cmsg.msg; msg->handle = 0; - msg->shmem_frame = virt_to_machine(blk_ptbe_ring) >> PAGE_SHIFT; + msg->shmem_frame = virt_to_machine(blktap_be_ring.sring) >> PAGE_SHIFT; ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE); } @@ -162,9 +301,11 @@ static void blkif_ptbe_close(void) /* Move from CLOSED to DISCONNECTED state. */ static void blkif_ptbe_disconnect(void) { - blk_ptbe_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL); - blk_ptbe_ring->req_prod = blk_ptbe_ring->resp_prod - = ptbe_resp_cons = ptbe_req_prod = 0; + blkif_sring_t *sring; + + sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL); + SHARED_RING_INIT(BLKIF_RING, sring); + FRONT_RING_INIT(BLKIF_RING, &blktap_be_ring, sring); blkif_pt_state = BLKIF_STATE_DISCONNECTED; DPRINTK("Blkif-Passthrough-BE is now DISCONNECTED.\n"); blkif_ptbe_send_interface_connect(); @@ -319,7 +460,9 @@ void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) case CMSG_BLKIF_BE_DISCONNECT: if ( msg->length != sizeof(blkif_be_disconnect_t) ) goto parse_error; - blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0]); + if ( !blkif_ptfe_disconnect((blkif_be_disconnect_t *)&msg->msg[0], + msg->id) ) + return; break; /* We just ignore anything to do with vbds for now. */ @@ -356,3 +499,12 @@ void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) msg->length = 0; ctrl_if_send_response(msg); } + +/*-----[ All control messages enter here: ]-------------------------------*/ + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); + memset(blkif_hash, 0, sizeof(blkif_hash)); +} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c index c8733dc088..367a83cecc 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_datapath.c @@ -5,55 +5,46 @@ * Block request routing data path. * * Copyright (c) 2004, Andrew Warfield - * + * -- see full header in blktap.c */ #include "blktap.h" +#include /*-----[ The data paths ]-------------------------------------------------*/ - -/* Connections to the frontend domains.*/ -blkif_t ptfe_blkif; - -/* Connection to a single backend domain. */ -blkif_ring_t *blk_ptbe_ring; /* Ring from the PT to the BE dom */ -BLKIF_RING_IDX ptbe_resp_cons; /* Response consumer for comms ring. */ -BLKIF_RING_IDX ptbe_req_prod; /* Private request producer. */ -/* Rings up to user space. */ -blkif_req_ring_t fe_ring;// = BLKIF_REQ_RING_INIT; -blkif_rsp_ring_t be_ring;// = BLKIF_RSP_RING_INIT; - -/*-----[ Ring helpers ]---------------------------------------------------*/ - -inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring) -{ - if (ring->type == BLKIF_REQ_RING_TYPE) { - blkif_req_ring_t *r = (blkif_req_ring_t *)ring; - return ( ( r->req_prod - r->rsp_cons ) == BLKIF_RING_SIZE ); - } - - /* for now assume that there is always room in the response path. */ - return 0; -} +/* Connection to a single backend domain. */ +blkif_front_ring_t blktap_be_ring; /*-----[ Tracking active requests ]---------------------------------------*/ /* this must be the same as MAX_PENDING_REQS in blkback.c */ -#define MAX_ACTIVE_REQS 64 +#define MAX_ACTIVE_REQS ((ACTIVE_RING_IDX)64U) -active_req_t active_reqs[MAX_ACTIVE_REQS]; -unsigned char active_req_ring[MAX_ACTIVE_REQS]; -spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; -typedef unsigned int ACTIVE_RING_IDX; -ACTIVE_RING_IDX active_prod, active_cons; +active_req_t active_reqs[MAX_ACTIVE_REQS]; +ACTIVE_RING_IDX active_req_ring[MAX_ACTIVE_REQS]; +spinlock_t active_req_lock = SPIN_LOCK_UNLOCKED; +ACTIVE_RING_IDX active_prod, active_cons; #define MASK_ACTIVE_IDX(_i) ((_i)&(MAX_ACTIVE_REQS-1)) #define ACTIVE_IDX(_ar) (_ar - active_reqs) +#define NR_ACTIVE_REQS (MAX_ACTIVE_REQS - active_prod + active_cons) inline active_req_t *get_active_req(void) { - ASSERT(active_cons != active_prod); - return &active_reqs[MASK_ACTIVE_IDX(active_cons++)]; + ACTIVE_RING_IDX idx; + active_req_t *ar; + unsigned long flags; + + ASSERT(active_cons != active_prod); + + spin_lock_irqsave(&active_req_lock, flags); + idx = active_req_ring[MASK_ACTIVE_IDX(active_cons++)]; + ar = &active_reqs[idx]; +if (ar->inuse) WPRINTK("AR INUSE! (%lu)\n", ar->id); +ar->inuse = 1; + spin_unlock_irqrestore(&active_req_lock, flags); + + return ar; } inline void free_active_req(active_req_t *ar) @@ -61,10 +52,16 @@ inline void free_active_req(active_req_t *ar) unsigned long flags; spin_lock_irqsave(&active_req_lock, flags); +ar->inuse = 0; active_req_ring[MASK_ACTIVE_IDX(active_prod++)] = ACTIVE_IDX(ar); spin_unlock_irqrestore(&active_req_lock, flags); } +active_req_t *lookup_active_req(ACTIVE_RING_IDX idx) +{ + return &active_reqs[idx]; +} + inline void active_reqs_init(void) { ACTIVE_RING_IDX i; @@ -76,55 +73,256 @@ inline void active_reqs_init(void) active_req_ring[i] = i; } +/* Requests passing through the tap to the backend hijack the id field + * in the request message. In it we put the AR index _AND_ the fe domid. + * the domid is used by the backend to map the pages properly. + */ + +static inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) +{ + return ( (fe_dom << 16) | idx ); +} + +inline unsigned int ID_TO_IDX(unsigned long id) +{ + return ( id & 0x0000ffff ); +} + +inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } + +/*-----[ Ring helpers ]---------------------------------------------------*/ + +inline int write_resp_to_fe_ring(blkif_t *blkif, blkif_response_t *rsp) +{ + blkif_response_t *resp_d; + active_req_t *ar; + + /* remap id, and free the active req. blkif lookup goes here too.*/ + ar = &active_reqs[ID_TO_IDX(rsp->id)]; + /* WPRINTK("%3u > %3lu\n", ID_TO_IDX(rsp->id), ar->id); */ + rsp->id = ar->id; + free_active_req(ar); + + resp_d = RING_GET_RESPONSE(BLKIF_RING, &blkif->blk_ring, + blkif->blk_ring.rsp_prod_pvt); + memcpy(resp_d, rsp, sizeof(blkif_response_t)); + wmb(); + blkif->blk_ring.rsp_prod_pvt++; + + return 0; +} + +inline int write_req_to_be_ring(blkif_request_t *req) +{ + blkif_request_t *req_d; + + req_d = RING_GET_REQUEST(BLKIF_RING, &blktap_be_ring, + blktap_be_ring.req_prod_pvt); + memcpy(req_d, req, sizeof(blkif_request_t)); + wmb(); + blktap_be_ring.req_prod_pvt++; + + return 0; +} + +inline void kick_fe_domain(blkif_t *blkif) +{ + RING_PUSH_RESPONSES(BLKIF_RING, &blkif->blk_ring); + notify_via_evtchn(blkif->evtchn); + DPRINTK("notified FE(dom %u)\n", blkif->domid); + +} + +inline void kick_be_domain(void) +{ + wmb(); /* Ensure that the frontend can see the requests. */ + RING_PUSH_REQUESTS(BLKIF_RING, &blktap_be_ring); + notify_via_evtchn(blkif_ptbe_evtchn); + DPRINTK("notified BE\n"); +} + /*-----[ Data to/from Frontend (client) VMs ]-----------------------------*/ +/*-----[ Scheduler list maint -from blkback ]--- */ + +static struct list_head blkio_schedule_list; +static spinlock_t blkio_schedule_list_lock; + +static int __on_blkdev_list(blkif_t *blkif) +{ + return blkif->blkdev_list.next != NULL; +} + +static void remove_from_blkdev_list(blkif_t *blkif) +{ + unsigned long flags; + if ( !__on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( __on_blkdev_list(blkif) ) + { + list_del(&blkif->blkdev_list); + blkif->blkdev_list.next = NULL; + blkif_put(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + +static void add_to_blkdev_list_tail(blkif_t *blkif) +{ + unsigned long flags; + if ( __on_blkdev_list(blkif) ) return; + spin_lock_irqsave(&blkio_schedule_list_lock, flags); + if ( !__on_blkdev_list(blkif) && (blkif->status == CONNECTED) ) + { + list_add_tail(&blkif->blkdev_list, &blkio_schedule_list); + blkif_get(blkif); + } + spin_unlock_irqrestore(&blkio_schedule_list_lock, flags); +} + + +/*-----[ Scheduler functions - from blkback ]--- */ + +static DECLARE_WAIT_QUEUE_HEAD(blkio_schedule_wait); + +static int do_block_io_op(blkif_t *blkif, int max_to_do); + +static int blkio_schedule(void *arg) +{ + DECLARE_WAITQUEUE(wq, current); + + blkif_t *blkif; + struct list_head *ent; + + daemonize( + "xentapd" + ); + + for ( ; ; ) + { + /* Wait for work to do. */ + add_wait_queue(&blkio_schedule_wait, &wq); + set_current_state(TASK_INTERRUPTIBLE); + if ( (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) || + list_empty(&blkio_schedule_list) ) + schedule(); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&blkio_schedule_wait, &wq); + + /* Queue up a batch of requests. */ + while ( (NR_ACTIVE_REQS < MAX_ACTIVE_REQS) && + !list_empty(&blkio_schedule_list) ) + { + ent = blkio_schedule_list.next; + blkif = list_entry(ent, blkif_t, blkdev_list); + blkif_get(blkif); + remove_from_blkdev_list(blkif); + if ( do_block_io_op(blkif, BATCH_PER_DOMAIN) ) + add_to_blkdev_list_tail(blkif); + blkif_put(blkif); + } + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + /* Push the batch through to disc. */ + run_task_queue(&tq_disk); +#endif + } +} + +static void maybe_trigger_blkio_schedule(void) +{ + /* + * Needed so that two processes, who together make the following predicate + * true, don't both read stale values and evaluate the predicate + * incorrectly. Incredibly unlikely to stall the scheduler on x86, but... + */ + smp_mb(); + + if ( (NR_ACTIVE_REQS < (MAX_ACTIVE_REQS)) && /* XXX!!! was M_A_R/2*/ + !list_empty(&blkio_schedule_list) ) + wake_up(&blkio_schedule_wait); +} + +void blkif_deschedule(blkif_t *blkif) +{ + remove_from_blkdev_list(blkif); +} + +void __init blkdev_schedule_init(void) +{ + spin_lock_init(&blkio_schedule_list_lock); + INIT_LIST_HEAD(&blkio_schedule_list); + + if ( kernel_thread(blkio_schedule, 0, CLONE_FS | CLONE_FILES) < 0 ) + BUG(); +} + +/*-----[ Interrupt entry from a frontend ]------ */ + irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_t *blkif = dev_id; + + add_to_blkdev_list_tail(blkif); + maybe_trigger_blkio_schedule(); + return IRQ_HANDLED; +} + +/*-----[ Other Frontend Ring functions ]-------- */ + +/* irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs)*/ +static int do_block_io_op(blkif_t *blkif, int max_to_do) { /* we have pending messages from the real frontend. */ - blkif_request_t *req_s, *req_d; - BLKIF_RING_IDX fe_rp; + blkif_request_t *req_s; + RING_IDX i, rp; unsigned long flags; - int notify; - unsigned long i; active_req_t *ar; + int more_to_do = 0; + int notify_be = 0, notify_user = 0; DPRINTK("PT got FE interrupt.\n"); + + if (NR_ACTIVE_REQS == MAX_ACTIVE_REQS) return 1; /* lock both rings */ spin_lock_irqsave(&blkif_io_lock, flags); - /* While there are REQUESTS on FERing: */ - fe_rp = ptfe_blkif.blk_ring_base->req_prod; + rp = blkif->blk_ring.sring->req_prod; rmb(); - notify = (ptfe_blkif.blk_req_cons != fe_rp); - - for (i = ptfe_blkif.blk_req_cons; i != fe_rp; i++) { - - /* Get the next request */ - req_s = &ptfe_blkif.blk_ring_base->ring[MASK_BLKIF_IDX(i)].req; + + for ( i = blkif->blk_ring.req_cons; + (i != rp) && + !RING_REQUEST_CONS_OVERFLOW(BLKIF_RING, &blkif->blk_ring, i); + i++ ) + { + + if ((--max_to_do == 0) || (NR_ACTIVE_REQS == MAX_ACTIVE_REQS)) + { + more_to_do = 1; + break; + } + req_s = RING_GET_REQUEST(BLKIF_RING, &blkif->blk_ring, i); /* This is a new request: * Assign an active request record, and remap the id. */ ar = get_active_req(); ar->id = req_s->id; - req_s->id = ACTIVE_IDX(ar); - DPRINTK("%3lu < %3lu\n", req_s->id, ar->id); + ar->blkif = blkif; + req_s->id = MAKE_ID(blkif->domid, ACTIVE_IDX(ar)); + /* WPRINTK("%3u < %3lu\n", ID_TO_IDX(req_s->id), ar->id); */ /* FE -> BE interposition point is here. */ /* ------------------------------------------------------------- */ /* BLKIF_OP_PROBE_HACK: */ - /* Until we have grant tables, we need to allow the backent to */ - /* map pages that are either from this domain, or more commonly */ - /* from the real front end. We achieve this in a terrible way, */ - /* by passing the front end's domid allong with PROBE messages */ - /* Once grant tables appear, this should all go away. */ + /* Signal to the backend that we are a tap domain. */ if (req_s->operation == BLKIF_OP_PROBE) { - DPRINTK("Adding FE domid to PROBE request.\n"); - (domid_t)(req_s->frame_and_sects[1]) = ptfe_blkif.domid; + DPRINTK("Adding BLKTAP_COOKIE to PROBE request.\n"); + req_s->frame_and_sects[1] = BLKTAP_COOKIE; } /* ------------------------------------------------------------- */ @@ -137,12 +335,9 @@ irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) /* In MODE_INTERCEPT_FE, map attached pages into the app vma */ /* In MODE_COPY_FE_PAGES, copy attached pages into the app vma */ - /* XXX: mapping/copying of attached pages is still not done! */ - DPRINTK("req->UFERing\n"); blktap_write_fe_ring(req_s); - - + notify_user = 1; } /* If we are not in MODE_INTERCEPT_FE or MODE_INTERCEPT_BE: */ @@ -153,61 +348,27 @@ irqreturn_t blkif_ptfe_int(int irq, void *dev_id, struct pt_regs *regs) /* copy the request message to the BERing */ DPRINTK("blktap: FERing[%u] -> BERing[%u]\n", - (unsigned)MASK_BLKIF_IDX(i), - (unsigned)MASK_BLKIF_IDX(ptbe_req_prod)); - - req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req; + (unsigned)__SHARED_RING_MASK(BLKIF_RING, + blktap_be_ring.sring, i), + (unsigned)__SHARED_RING_MASK(BLKIF_RING, + blktap_be_ring.sring, blktap_be_ring.req_prod_pvt)); - memcpy(req_d, req_s, sizeof(blkif_request_t)); - - ptbe_req_prod++; - } - } - - ptfe_blkif.blk_req_cons = i; - - /* If we have forwarded any responses, notify the appropriate ends. */ - if (notify) { - - /* we have sent stuff to the be, notify it. */ - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_INTERCEPT_BE)) ) { - wmb(); - blk_ptbe_ring->req_prod = ptbe_req_prod; - - notify_via_evtchn(blkif_ptbe_evtchn); - DPRINTK(" -- and notified.\n"); - } - - /* we sent stuff to the app, notify it. */ - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) || - (blktap_mode & BLKTAP_MODE_COPY_FE) ) { - - blktap_kick_user(); + write_req_to_be_ring(req_s); + notify_be = 1; } } + blkif->blk_ring.req_cons = i; + /* unlock rings */ spin_unlock_irqrestore(&blkif_io_lock, flags); - - return IRQ_HANDLED; -} - -inline int write_req_to_be_ring(blkif_request_t *req) -{ - blkif_request_t *req_d; - - req_d = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(ptbe_req_prod)].req; - memcpy(req_d, req, sizeof(blkif_request_t)); - ptbe_req_prod++; - - return 0; -} - -inline void kick_be_domain(void) { - wmb(); - blk_ptbe_ring->req_prod = ptbe_req_prod; - notify_via_evtchn(blkif_ptbe_evtchn); + + if (notify_user) + blktap_kick_user(); + if (notify_be) + kick_be_domain(); + + return more_to_do; } /*-----[ Data to/from Backend (server) VM ]------------------------------*/ @@ -216,31 +377,27 @@ inline void kick_be_domain(void) { irqreturn_t blkif_ptbe_int(int irq, void *dev_id, struct pt_regs *ptregs) { - blkif_response_t *resp_s, *resp_d; - BLKIF_RING_IDX be_rp; + blkif_response_t *resp_s; + blkif_t *blkif; + RING_IDX rp, i; unsigned long flags; - int notify; - unsigned long i; - active_req_t *ar; DPRINTK("PT got BE interrupt.\n"); /* lock both rings */ spin_lock_irqsave(&blkif_io_lock, flags); - /* While there are RESPONSES on BERing: */ - be_rp = blk_ptbe_ring->resp_prod; + rp = blktap_be_ring.sring->rsp_prod; rmb(); - notify = (ptbe_resp_cons != be_rp); - - for ( i = ptbe_resp_cons; i != be_rp; i++ ) + + for ( i = blktap_be_ring.rsp_cons; i != rp; i++) { - /* BE -> FE interposition point is here. */ + resp_s = RING_GET_RESPONSE(BLKIF_RING, &blktap_be_ring, i); - /* Get the next response */ - resp_s = &blk_ptbe_ring->ring[MASK_BLKIF_IDX(i)].resp; + /* BE -> FE interposition point is here. */ - + blkif = active_reqs[ID_TO_IDX(resp_s->id)].blkif; + /* If we are in MODE_INTERCEPT_BE or MODE_COPY_BE: */ if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || (blktap_mode & BLKTAP_MODE_COPY_BE) ) { @@ -249,10 +406,9 @@ irqreturn_t blkif_ptbe_int(int irq, void *dev_id, /* In MODE_INTERCEPT_BE, map attached pages into the app vma */ /* In MODE_COPY_BE_PAGES, copy attached pages into the app vma */ - /* XXX: copy/map the attached page! */ - DPRINTK("rsp->UBERing\n"); blktap_write_be_ring(resp_s); + blktap_kick_user(); } @@ -264,254 +420,49 @@ irqreturn_t blkif_ptbe_int(int irq, void *dev_id, /* Copy the response message to FERing */ DPRINTK("blktap: BERing[%u] -> FERing[%u]\n", - (unsigned) MASK_BLKIF_IDX(i), - (unsigned) MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)); - - /* remap id, and free the active req. blkif lookup goes here too.*/ - ar = &active_reqs[resp_s->id]; - DPRINTK("%3lu > %3lu\n", resp_s->id, ar->id); - resp_s->id = ar->id; - free_active_req(ar); - - resp_d = &ptfe_blkif.blk_ring_base->ring[ - MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp; - - memcpy(resp_d, resp_s, sizeof(blkif_response_t)); - - ptfe_blkif.blk_resp_prod++; + (unsigned)__SHARED_RING_MASK(BLKIF_RING, + blkif->blk_ring.sring, i), + (unsigned)__SHARED_RING_MASK(BLKIF_RING, + blkif->blk_ring.sring, + blkif->blk_ring.rsp_prod_pvt)); - } - } - - ptbe_resp_cons = i; - - /* If we have forwarded any responses, notify the apropriate domains. */ - if (notify) { + write_resp_to_fe_ring(blkif, resp_s); + kick_fe_domain(blkif); - /* we have sent stuff to the fe. notify it. */ - if ( !((blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || - (blktap_mode & BLKTAP_MODE_INTERCEPT_FE)) ) { - wmb(); - ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod; - - notify_via_evtchn(ptfe_blkif.evtchn); - DPRINTK(" -- and notified.\n"); - } - - /* we sent stuff to the app, notify it. */ - if ( (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) || - (blktap_mode & BLKTAP_MODE_COPY_BE) ) { - - blktap_kick_user(); } } - - spin_unlock_irqrestore(&blkif_io_lock, flags); - return IRQ_HANDLED; -} - -inline int write_resp_to_fe_ring(blkif_response_t *rsp) -{ - blkif_response_t *resp_d; - active_req_t *ar; - /* remap id, and free the active req. blkif lookup goes here too.*/ - ar = &active_reqs[rsp->id]; - DPRINTK("%3lu > %3lu\n", rsp->id, ar->id); - rsp->id = ar->id; - free_active_req(ar); - - resp_d = &ptfe_blkif.blk_ring_base->ring[ - MASK_BLKIF_IDX(ptfe_blkif.blk_resp_prod)].resp; - - memcpy(resp_d, rsp, sizeof(blkif_response_t)); - ptfe_blkif.blk_resp_prod++; - - return 0; -} - -inline void kick_fe_domain(void) { - wmb(); - ptfe_blkif.blk_ring_base->resp_prod = ptfe_blkif.blk_resp_prod; - notify_via_evtchn(ptfe_blkif.evtchn); + blktap_be_ring.rsp_cons = i; -} - -static inline void flush_requests(void) -{ - wmb(); /* Ensure that the frontend can see the requests. */ - blk_ptbe_ring->req_prod = ptbe_req_prod; - notify_via_evtchn(blkif_ptbe_evtchn); -} - -/*-----[ Data to/from user space ]----------------------------------------*/ - - -int blktap_write_fe_ring(blkif_request_t *req) -{ - blkif_request_t *target; - int error, i; - - /* - * This is called to pass a request from the real frontend domain's - * blkif ring to the character device. - */ - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: fe_ring not ready for a request!\n"); - return 0; - } - - if ( BLKTAP_RING_FULL(RING(&fe_ring)) ) { - DPRINTK("blktap: fe_ring is full, can't add.\n"); - return 0; - } - - target = &fe_ring.ring->ring[MASK_BLKIF_IDX(fe_ring.req_prod)].req; - memcpy(target, req, sizeof(*req)); - -/* maybe move this stuff out into a seperate func ------------------- */ - - /* - * For now, map attached page into a fixed position into the vma. - * XXX: make this map to a free page. - */ - - /* Attempt to map the foreign pages directly in to the application */ - for (i=0; inr_segments; i++) { - - /* get an unused virtual address from the char device */ - /* store the old page address */ - /* replace the address with the virtual address */ - - /* blktap_vma->vm_start+((2+i)*PAGE_SIZE) */ - - error = direct_remap_area_pages(blktap_vma->vm_mm, - MMAP_VADDR(req->id, i), - target->frame_and_sects[0] & PAGE_MASK, - PAGE_SIZE, - blktap_vma->vm_page_prot, - ptfe_blkif.domid); - if ( error != 0 ) { - printk(KERN_INFO "remapping attached page failed! (%d)\n", error); - return 0; - } - } - /* fix the address of the attached page in the message. */ - /* TODO: preserve the segment number stuff here... */ - /* target->frame_and_sects[0] = blktap_vma->vm_start + PAGE_SIZE;*/ -/* ------------------------------------------------------------------ */ + spin_unlock_irqrestore(&blkif_io_lock, flags); - fe_ring.req_prod++; - - return 0; -} - -int blktap_write_be_ring(blkif_response_t *rsp) -{ - blkif_response_t *target; - - /* - * This is called to pass a request from the real backend domain's - * blkif ring to the character device. - */ - - if ( ! blktap_ring_ok ) { - DPRINTK("blktap: be_ring not ready for a request!\n"); - return 0; - } - - if ( BLKTAP_RING_FULL(RING(&be_ring)) ) { - DPRINTK("blktap: be_ring is full, can't add.\n"); - return 0; - } - - target = &be_ring.ring->ring[MASK_BLKIF_IDX(be_ring.rsp_prod)].resp; - memcpy(target, rsp, sizeof(*rsp)); - - - /* XXX: map attached pages and fix-up addresses in the copied address. */ - - be_ring.rsp_prod++; - - return 0; + return IRQ_HANDLED; } -int blktap_read_fe_ring(void) -{ - /* This is called to read responses from the UFE ring. */ - - BLKIF_RING_IDX fe_rp; - unsigned long i; - int notify; - - DPRINTK("blktap_read_fe_ring()\n"); - - fe_rp = fe_ring.ring->resp_prod; - rmb(); - notify = (fe_rp != fe_ring.rsp_cons); - - /* if we are forwarding from UFERring to FERing */ - if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { - - /* for each outstanding message on the UFEring */ - for ( i = fe_ring.rsp_cons; i != fe_rp; i++ ) { +/* Debug : print the current ring indices. */ - /* XXX: remap pages on that message as necessary */ - /* copy the message to the UBEring */ - - DPRINTK("resp->fe_ring\n"); - write_resp_to_fe_ring(&fe_ring.ring->ring[MASK_BLKIF_IDX(i)].resp); - } - - fe_ring.rsp_cons = fe_rp; - - /* notify the fe if necessary */ - if ( notify ) { - DPRINTK("kick_fe_domain()\n"); - kick_fe_domain(); - } - } - - return 0; -} - -int blktap_read_be_ring(void) +void print_vm_ring_idxs(void) { - /* This is called to read responses from the UBE ring. */ - - BLKIF_RING_IDX be_rp; - unsigned long i; - int notify; - - DPRINTK("blktap_read_be_ring()\n"); - - be_rp = be_ring.ring->req_prod; - rmb(); - notify = (be_rp != be_ring.req_cons); - - /* if we are forwarding from UFERring to FERing */ - if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { - - /* for each outstanding message on the UFEring */ - for ( i = be_ring.req_cons; i != be_rp; i++ ) { - - /* XXX: remap pages on that message as necessary */ - /* copy the message to the UBEring */ - - DPRINTK("req->be_ring\n"); - write_req_to_be_ring(&be_ring.ring->ring[MASK_BLKIF_IDX(i)].req); - } - - be_ring.req_cons = be_rp; - - /* notify the fe if necessary */ - if ( notify ) { - DPRINTK("kick_be_domain()\n"); - kick_be_domain(); - } + int i; + blkif_t *blkif; + + WPRINTK("FE Rings: \n---------\n"); + for ( i = 0; i < 50; i++) { + blkif = blkif_find_by_handle((domid_t)i, 0); + if (blkif != NULL) + WPRINTK("%2d: req_cons: %2d, rsp_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", i, + blkif->blk_ring.req_cons, + blkif->blk_ring.rsp_prod_pvt, + blkif->blk_ring.sring->req_prod, + blkif->blk_ring.sring->rsp_prod); } - - return 0; -} + WPRINTK("BE Ring: \n--------\n"); + WPRINTK("BE: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_be_ring.rsp_cons, + blktap_be_ring.req_prod_pvt, + blktap_be_ring.sring->req_prod, + blktap_be_ring.sring->rsp_prod); +} diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c index c10e3f3a44..500270259c 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c +++ b/linux-2.6.10-xen-sparse/drivers/xen/blktap/blktap_userdev.c @@ -37,6 +37,10 @@ struct vm_area_struct *blktap_vma; unsigned long mmap_vstart; unsigned long rings_vstart; +/* Rings up to user space. */ +static blkif_front_ring_t blktap_ufe_ring; +static blkif_back_ring_t blktap_ube_ring; + /* -------[ blktap vm ops ]------------------------------------------- */ static struct page *blktap_nopage(struct vm_area_struct *vma, @@ -61,41 +65,39 @@ struct vm_operations_struct blktap_vm_ops = { static int blktap_open(struct inode *inode, struct file *filp) { + blkif_sring_t *sring; + if ( test_and_set_bit(0, &blktap_dev_inuse) ) return -EBUSY; printk(KERN_ALERT "blktap open.\n"); /* Allocate the fe ring. */ - fe_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL); - if (fe_ring.ring == NULL) + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) goto fail_nomem; - SetPageReserved(virt_to_page(fe_ring.ring)); + SetPageReserved(virt_to_page(sring)); - fe_ring.ring->req_prod = fe_ring.ring->resp_prod - = fe_ring.req_prod - = fe_ring.rsp_cons - = 0; + SHARED_RING_INIT(BLKIF_RING, sring); + FRONT_RING_INIT(BLKIF_RING, &blktap_ufe_ring, sring); /* Allocate the be ring. */ - be_ring.ring = (blkif_ring_t *)get_zeroed_page(GFP_KERNEL); - if (be_ring.ring == NULL) + sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL); + if (sring == NULL) goto fail_free_fe; - SetPageReserved(virt_to_page(be_ring.ring)); + SetPageReserved(virt_to_page(sring)); - be_ring.ring->req_prod = be_ring.ring->resp_prod - = be_ring.rsp_prod - = be_ring.req_cons - = 0; + SHARED_RING_INIT(BLKIF_RING, sring); + BACK_RING_INIT(BLKIF_RING, &blktap_ube_ring, sring); DPRINTK(KERN_ALERT "blktap open.\n"); return 0; fail_free_fe: - free_page( (unsigned long) fe_ring.ring); + free_page( (unsigned long) blktap_ufe_ring.sring); fail_nomem: return -ENOMEM; @@ -109,11 +111,11 @@ static int blktap_release(struct inode *inode, struct file *filp) printk(KERN_ALERT "blktap closed.\n"); /* Free the ring page. */ - ClearPageReserved(virt_to_page(fe_ring.ring)); - free_page((unsigned long) fe_ring.ring); + ClearPageReserved(virt_to_page(blktap_ufe_ring.sring)); + free_page((unsigned long) blktap_ufe_ring.sring); - ClearPageReserved(virt_to_page(be_ring.ring)); - free_page((unsigned long) be_ring.ring); + ClearPageReserved(virt_to_page(blktap_ube_ring.sring)); + free_page((unsigned long) blktap_ube_ring.sring); return 0; } @@ -146,16 +148,18 @@ static int blktap_mmap(struct file *filp, struct vm_area_struct *vma) /* not sure if I really need to do this... */ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - DPRINTK("Mapping be_ring page %lx.\n", __pa(be_ring.ring)); - if (remap_page_range(vma, vma->vm_start, __pa(be_ring.ring), PAGE_SIZE, - vma->vm_page_prot)) { - printk(KERN_ERR "be_ring: remap_page_range failure!\n"); + DPRINTK("Mapping be_ring page %lx.\n", __pa(blktap_ube_ring.sring)); + if (remap_page_range(vma, vma->vm_start, + __pa(blktap_ube_ring.sring), + PAGE_SIZE, vma->vm_page_prot)) { + WPRINTK("be_ring: remap_page_range failure!\n"); } - DPRINTK("Mapping fe_ring page %lx.\n", __pa(fe_ring.ring)); - if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, __pa(fe_ring.ring), + DPRINTK("Mapping fe_ring page %lx.\n", __pa(blktap_ufe_ring.sring)); + if (remap_page_range(vma, vma->vm_start + PAGE_SIZE, + __pa(blktap_ufe_ring.sring), PAGE_SIZE, vma->vm_page_prot)) { - printk(KERN_ERR "fe_ring: remap_page_range failure!\n"); + WPRINTK("fe_ring: remap_page_range failure!\n"); } blktap_vma = vma; @@ -181,7 +185,24 @@ static int blktap_ioctl(struct inode *inode, struct file *filp, printk(KERN_INFO "blktap: set mode to %lx\n", arg); return 0; } - /* XXX: return a more meaningful error case here. */ + case BLKTAP_IOCTL_PRINT_IDXS: + { + print_vm_ring_idxs(); + WPRINTK("User Rings: \n-----------\n"); + WPRINTK("UF: rsp_cons: %2d, req_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ufe_ring.rsp_cons, + blktap_ufe_ring.req_prod_pvt, + blktap_ufe_ring.sring->req_prod, + blktap_ufe_ring.sring->rsp_prod); + WPRINTK("UB: req_cons: %2d, rsp_prod_prv: %2d " + "| req_prod: %2d, rsp_prod: %2d\n", + blktap_ube_ring.req_cons, + blktap_ube_ring.rsp_prod_pvt, + blktap_ube_ring.sring->req_prod, + blktap_ube_ring.sring->rsp_prod); + + } } return -ENOIOCTLCMD; } @@ -190,11 +211,11 @@ static unsigned int blktap_poll(struct file *file, poll_table *wait) { poll_wait(file, &blktap_wait, wait); - if ( (fe_ring.req_prod != fe_ring.ring->req_prod) || - (be_ring.rsp_prod != be_ring.ring->resp_prod) ) { + if ( RING_HAS_UNPUSHED_REQUESTS(BLKIF_RING, &blktap_ufe_ring) || + RING_HAS_UNPUSHED_RESPONSES(BLKIF_RING, &blktap_ube_ring) ) { - fe_ring.ring->req_prod = fe_ring.req_prod; - be_ring.ring->resp_prod = be_ring.rsp_prod; + RING_PUSH_REQUESTS(BLKIF_RING, &blktap_ufe_ring); + RING_PUSH_RESPONSES(BLKIF_RING, &blktap_ube_ring); return POLLIN | POLLRDNORM; } @@ -215,7 +236,149 @@ static struct file_operations blktap_fops = { release: blktap_release, mmap: blktap_mmap, }; + +/*-----[ Data to/from user space ]----------------------------------------*/ + + +int blktap_write_fe_ring(blkif_request_t *req) +{ + blkif_request_t *target; + int error, i; + /* + * This is called to pass a request from the real frontend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: ufe_ring not ready for a request!\n"); + return 0; + } + + if ( RING_FULL(BLKIF_RING, &blktap_ufe_ring) ) { + DPRINTK("blktap: fe_ring is full, can't add.\n"); + return 0; + } + + //target = RING_NEXT_EMPTY_REQUEST(BLKIF_RING, &blktap_ufe_ring); + target = RING_GET_REQUEST(BLKIF_RING, &blktap_ufe_ring, + blktap_ufe_ring.req_prod_pvt); + memcpy(target, req, sizeof(*req)); + + /* Attempt to map the foreign pages directly in to the application */ + for (i=0; inr_segments; i++) { + + error = direct_remap_area_pages(blktap_vma->vm_mm, + MMAP_VADDR(ID_TO_IDX(req->id), i), + target->frame_and_sects[0] & PAGE_MASK, + PAGE_SIZE, + blktap_vma->vm_page_prot, + ID_TO_DOM(req->id)); + if ( error != 0 ) { + printk(KERN_INFO "remapping attached page failed! (%d)\n", error); + /* the request is now dropped on the floor. */ + return 0; + } + } + + blktap_ufe_ring.req_prod_pvt++; + + return 0; +} + +int blktap_write_be_ring(blkif_response_t *rsp) +{ + blkif_response_t *target; + + /* + * This is called to pass a request from the real backend domain's + * blkif ring to the character device. + */ + + if ( ! blktap_ring_ok ) { + DPRINTK("blktap: be_ring not ready for a request!\n"); + return 0; + } + + /* No test for fullness in the response direction. */ + + //target = RING_NEXT_EMPTY_RESPONSE(BLKIF_RING, &blktap_ube_ring); + target = RING_GET_RESPONSE(BLKIF_RING, &blktap_ube_ring, + blktap_ube_ring.rsp_prod_pvt); + memcpy(target, rsp, sizeof(*rsp)); + + /* no mapping -- pages were mapped in blktap_write_fe_ring() */ + + blktap_ube_ring.rsp_prod_pvt++; + + return 0; +} + +int blktap_read_fe_ring(void) +{ + /* This is called to read responses from the UFE ring. */ + + RING_IDX i, rp; + blkif_response_t *resp_s; + blkif_t *blkif; + active_req_t *ar; + + DPRINTK("blktap_read_fe_ring()\n"); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_FE) { + + /* for each outstanding message on the UFEring */ + //RING_FOREACH_RESPONSE(BLKIF_RING, &blktap_ufe_ring, prod, resp_s) { + rp = blktap_ufe_ring.sring->rsp_prod; + rmb(); + + for ( i = blktap_ufe_ring.rsp_cons; i != rp; i++ ) + { + resp_s = RING_GET_RESPONSE(BLKIF_RING, &blktap_ufe_ring, i); + + DPRINTK("resp->fe_ring\n"); + ar = lookup_active_req(ID_TO_IDX(resp_s->id)); + blkif = ar->blkif; + write_resp_to_fe_ring(blkif, resp_s); + kick_fe_domain(blkif); + } + + blktap_ufe_ring.rsp_cons = i; + } + return 0; +} + +int blktap_read_be_ring(void) +{ + /* This is called to read requests from the UBE ring. */ + + RING_IDX i, rp; + blkif_request_t *req_s; + + DPRINTK("blktap_read_be_ring()\n"); + + /* if we are forwarding from UFERring to FERing */ + if (blktap_mode & BLKTAP_MODE_INTERCEPT_BE) { + + /* for each outstanding message on the UFEring */ + //RING_FOREACH_REQUEST(BLKIF_RING, &blktap_ube_ring, prod, req_s) { + rp = blktap_ube_ring.sring->req_prod; + rmb(); + for ( i = blktap_ube_ring.req_cons; i != rp; i++ ) + { + req_s = RING_GET_REQUEST(BLKIF_RING, &blktap_ube_ring, i); + + DPRINTK("req->be_ring\n"); + write_req_to_be_ring(req_s); + kick_be_domain(); + } + + blktap_ube_ring.req_cons = i; + } + + return 0; +} /* -------[ blktap module setup ]------------------------------------- */ static struct miscdevice blktap_miscdev = { -- 2.30.2